library(sf)
library(readxl)
library(tidyverse)
library(mapview)
library(gtfstools)
library(stringr)
library(gt)
mapviewOptions(fgb = FALSE)Calculating Track Mileage by UZA
Excluding CapeFlyer Service
UZA Work
We write a function to grab the files from the Census Bureau site and perform light filtering and manipulation.
sf_from_zip <- function(URL) {
# based on:
# https://stackoverflow.com/questions/59740419/unzipping-and-reading-shape-file-in-r-without-rgdal-installed
# Load local temp files
temp_0 <- tempfile()
temp_1 <- tempfile()
# download the zip, put it in the first temp directory
download.file(URL, temp_0)
# Unzip and place in temp_1
unzip(zipfile = temp_0, exdir = temp_1)
# Read the shapefile.
geom_sf <- sf::read_sf(temp_1)
out <- geom_sf
}# Record the URLs.
UZA10_URL <- "https://www2.census.gov/geo/tiger/TIGER2020/UAC/tl_2020_us_uac10.zip"
UZA20_URL <- "https://www2.census.gov/geo/tiger/TIGER2020/UAC/tl_2020_us_uac20.zip"
# Obtain the data.
UZA10 <- sf_from_zip(UZA10_URL)
UZA20 <- sf_from_zip(UZA20_URL)
# Read data from population file:
# https://www.transit.dot.gov/ntd/2020-census-changes-uzapopulation
UZA20_pop <- read_xlsx("../data/base/UZA_CHANGES_1990-2020_2_2.xlsx",
sheet = "UZA 2020 Master",
col_types = "text") |>
janitor::clean_names() |>
mutate(x2020_uace = str_pad(x2020_uace, side = "left", pad = "0", 5))
# Transform to state coordinate system.
UZA10 <- UZA10 |> sf::st_transform(26986)
UZA20 <- UZA20 |> sf::st_transform(26986)
NE <- c("MA", "CT", "RI", "VT", "NH", "ME")
NE_collapse <- paste0(NE,collapse = "|")
# Clean the UZAs for processing. We want to separate out
# clusters (C) and Urbanized Areas (U)
UZA10_NE <- UZA10 |>
filter(str_detect(NAME10, NE_collapse)) |>
mutate(UZA_type = if_else(UATYP10 == "C",
"Census 2010 Cluster",
"Census 2010 Urbanized Area"))
# There are only Urban Areas in 2020.
UZA20_NE <- UZA20 |>
filter(str_detect(NAME20, NE_collapse)) |>
left_join(UZA20_pop |> select(x2020_uace, x2020_population), by = c("GEOID20" = "x2020_uace")) |>
mutate(UZA_type = if_else(!is.na(x2020_population),
"Pop: GTE 50k",
"Pop: LT 50k"))
saveRDS(UZA10_NE, "../data/processed/UZA10_NE.rds")
saveRDS(UZA20_NE, "../data/processed/UZA20_NE.rds")CR Line Work
We start by downloading the last Summer 2022 GTFS file from the MBTA’s GTFS archive. This file contains information for the CapeFlyer and has the recent Foxboro Branch.
# This is the last Summer 2022 GTFS schedule.
# We chose this because it contains the CapeFlyer if we need it.
gtfs_summ22 <- gtfstools::read_gtfs("https://cdn.mbtace.com/archive/20220812.zip")
# Convert shapes to MA system.
gtfs_summ22_shp <- convert_shapes_to_sf(gtfs_summ22) |>
st_transform(26986)
# Attach them to our route_ids so we can have hints about what each shape is.
gtfs_summ22_shp <-
inner_join(gtfs_summ22_shp,
gtfs_summ22$trips |> distinct(route_id, shape_id),
by = "shape_id")We need to filter the shapes for commuter rail lines. We can view the shapes to confirm that this matches the system as we know it.
gtfs_summ22_shp_filt <- gtfs_summ22_shp |>
filter(str_detect(route_id, "CR-")) # Removed CapeFlyer
gtfs_summ22_shp_filt |> mapview(zcol = "route_id", color = viridis::turbo)The shapes representing some of the same tracks are very close but not exactly the same. If not addressed, this would cause an inflation in directional route miles. We can buffer the routes slightly to coalesce similar track segments.
Zoom into a station like Readville to see the effects of the buffer on the final result. You’ll notice a “cupping” where we would really want something resembling an “X” through the junction.
# Buffer out by 20m, then in by -19 meters -1*(20-1). A visual inspection suggested that 20m was enough distance to capture nearby alignments.
d <- 20
gtfs_summ22_shp_filt_buff <- gtfs_summ22_shp_filt |>
st_buffer(d, endCapStyle = "SQUARE", joinStyle = "ROUND") |>
summarize() |>
st_buffer(-1 * (d-1), endCapStyle = "SQUARE", joinStyle = "ROUND")
mapview(gtfs_summ22_shp_filt_buff) +
mapview(gtfs_summ22_shp_filt,
zcol = "route_id",
color = viridis::turbo)We take the outside line of the buffer which gives us an approximation of the length of the routes. In this case, because we are concerned with directional route miles, each line can be thought of as one direction of the route, meaning we do not have to divide by two to get the length of the centerline.
gtfs_summ22_shp_line <- gtfs_summ22_shp_filt_buff |> st_cast(to = "MULTILINESTRING")saveRDS(gtfs_summ22_shp_line,
"../data/processed/gtfs_summ22_shp_line_noCF.rds")Perform Intersections
Read in the pre-generated datasets.
# Read in trains
trains_pass <- readRDS("../data/processed/gtfs_summ22_shp_line_noCF.rds")
# Read in UZA
UZA10_NE <- readRDS("../data/processed/UZA10_NE.rds")
UZA20_NE <- readRDS("../data/processed/UZA20_NE.rds")
# Create labels for helpful hovering.
labs10 <- UZA10_NE$NAME10
labs20 <- UZA20_NE$NAME20We perform an intersection over each of the datasets.
trains_pass_UZA10 <- st_intersection(trains_pass, UZA10_NE)
trains_pass_UZA20 <- st_intersection(trains_pass, UZA20_NE)We map the datasets, showing only UZAs in MA.
The 2010 UZA boundaries are also shown over the modern alignments. This can be useful to see how the boundaries have changed in relation to the current alignments. Notice the new encroachment of the Providence UZA into what was once the Boston UZA, which now captures some of the Franklin Line.